import pyspark.sql.functions as F
from pyspark.sql.types import *
import json,boto3
from datetime import datetime,timedelta
import pandas as pd
# get daily event count
def get_date(dt_in=None):
try:
temp = datetime.strptime(dt_in[0:19], '%Y-%m-%dT%H:%M:%S')
return str(temp.date())
except:
return None
get_dateUDF = F.udf(get_date,StringType())
%%local
import plotly.express as px
df_all = spark.read.option("header",True).csv("s3://mist-data-science-dev/may/roaming_exploration/daily_event_count")\
.filter(F.col("date")>='2021-02-09')\
.select("date",F.col("event_count").cast(LongType()),"event_type",F.col("count_distinct_wc").cast(LongType())) #F.to_date(F.col("date").cast(TimestampType())).alias("date")
+----------+-----------+-------------+-----------------+ | date|event_count| event_type|count_distinct_wc| +----------+-----------+-------------+-----------------+ |2021-02-16| 678542|sticky-client| 36110| |2021-02-09| 50159|sticky-client| 3156| |2021-02-18| 750229|sticky-client| 37703| |2021-02-10| 585622|sticky-client| 29740| |2021-02-13| 675713|sticky-client| 29465| |2021-02-14| 328811|sticky-client| 24395| |2021-02-15| 590071|sticky-client| 30906| |2021-02-17| 701604|sticky-client| 36645| |2021-02-20| 83597|sticky-client| 9848| |2021-02-19| 815166|sticky-client| 38330| |2021-02-12| 797668|sticky-client| 35167| |2021-02-11| 701694|sticky-client| 34820| |2021-02-16| 116597768| client-roam| 3332797| |2021-02-09| 42766885| client-roam| 2146350| |2021-02-18| 123070329| client-roam| 3576829| |2021-02-10| 138311387| client-roam| 4007565| |2021-02-13| 140362282| client-roam| 4133301| |2021-02-14| 123084502| client-roam| 3555420| |2021-02-15| 112711903| client-roam| 3202255| |2021-02-17| 123758882| client-roam| 3595755| +----------+-----------+-------------+-----------------+ only showing top 20 rows
%%spark -o df_all
%%local
# Event Comparison
px.bar(df_all,x="date", y="event_count", color="event_type", barmode="group")
%%local
# Event Count in log scale
px.bar(df_all,x="date", y="event_count", color="event_type", barmode="group",log_y=True)
%%local
#Count Distinct WC comparison
px.bar(df_all,x="date", y="count_distinct_wc", color="event_type", barmode="group")
%%local
# Average Event Per Client
df_all["avg_event_per_WC"] = df_all[["count_distinct_wc","event_count"]].apply(lambda x:x[1]/x[0],axis=1)
px.bar(df_all,x="date", y="avg_event_per_WC", color="event_type", barmode="group")
# Get org Name
df_org= spark.read.parquet("s3://mist-secorapp-production/dimension/org/org.parquet").select(F.col("id").alias("OrgID"),"name")
df_org.show(3)
+--------------------+----------+ | OrgID| name| +--------------------+----------+ |e33ade80-f2d5-11e...| mist| |0fe71a27-54a6-4a4...|SuiteTreat| |58656846-1c35-11e...| Cisco| +--------------------+----------+ only showing top 3 rows
df_cr = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/org_client_roam/").select("OrgID","name","date",F.col("suboptimal_event_count").alias("client_roam_event_count"),"dt")
temp_cr = df_cr.groupby("OrgID","name").agg(F.sum("client_roam_event_count").alias("client_roam_event_count")).sort(F.desc("client_roam_event_count")).limit(10)
df_event_count = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/org_event/").join(df_org,"OrgID")
temp_sp = df_event_count.groupby("OrgID","name").agg(F.sum("suboptimal_event_count").alias("suboptimal_event_count")).sort(F.desc("suboptimal_event_count")).limit(10)
temp_pp = df_event_count.groupby("OrgID","name").agg(F.sum("pingpong_event_count").alias("pingpong_event_count")).sort(F.desc("pingpong_event_count")).limit(10)
df_sticky_client = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/sticky_client_flatten/").join(df_org,"OrgID")
df_sticky_client_count = df_sticky_client.groupby("OrgID","name","Band","date").agg(F.count("When").alias("sticky_client_event_count"))
temp_sc = df_sticky_client_count.groupby("OrgID","name").agg(F.sum("sticky_client_event_count").alias("sticky_client_event_count")).sort(F.desc("sticky_client_event_count")).limit(10)
df_all = temp_cr.join(temp_sp,["OrgID","name"],"outer").join(temp_pp,["OrgID","name"],"outer").join(temp_sc,["OrgID","name"],"outer").select("OrgID").distinct()
df_cr_total = df_cr.groupby("OrgID","name").agg(F.sum("client_roam_event_count").alias("client_roam_event_count")).join(df_all,"OrgID","inner")
df_sp_total = df_event_count.groupby("OrgID","name").agg(F.sum("suboptimal_event_count").alias("suboptimal_roam_event_count")).join(df_all,"OrgID","inner")
df_pp_total = df_event_count.groupby("OrgID","name").agg(F.sum("pingpong_event_count").alias("pingpong_roam_event_count")).join(df_all,"OrgID","inner")
df_sc_total = df_sticky_client_count.groupby("OrgID","name").agg(F.sum("sticky_client_event_count").alias("sticky_client_event_count")).join(df_all,"OrgID","inner")
df_final = df_cr_total.join(df_sp_total,["OrgID","name"],"outer").join(df_pp_total,["OrgID","name"],"outer").join(df_sc_total,["OrgID","name"],"outer")
df_final.show(2)
+--------------------+------+-----------------------+---------------------------+-------------------------+-------------------------+ | OrgID| name|client_roam_event_count|suboptimal_roam_event_count|pingpong_roam_event_count|sticky_client_event_count| +--------------------+------+-----------------------+---------------------------+-------------------------+-------------------------+ |39b04356-4cc3-41c...|Meijer| 16178104| 30576| 118| null| |b2e5fc49-d460-42b...|WeWork| 600387| 1351| 21| 361642| +--------------------+------+-----------------------+---------------------------+-------------------------+-------------------------+ only showing top 2 rows
%%spark -o df_final
%%local
import plotly.graph_objects as go
#x=['b', 'a', 'c', 'd']
fig = go.Figure(go.Bar(x=df_all["name"], y=df_all["client_roam_event_count"], name='client_roam'))
fig.add_trace(go.Bar(x=df_all["name"], y=df_all["suboptimal_event_count"], name='suboptimal_roam'))
fig.add_trace(go.Bar(x=df_all["name"], y=df_all["pingpong_event_count"], name='pingpong_roam'))
fig.add_trace(go.Bar(x=df_all["name"], y=df_all["sticky_client_event_count"], name='sticky_client'))
fig.update_yaxes(type="log")
fig.update_layout(barmode='stack')
fig.update_xaxes(categoryorder='category ascending')
fig.show()
%%local
fig = px.bar(df_final,x="name",\
y=["client_roam_event_count","suboptimal_roam_event_count","pingpong_roam_event_count","sticky_client_event_count"],barmode="group",title="Org with Top 10 Event Counts",log_y=True)
fig.update_layout(
xaxis_title="Org Name",
yaxis_title="Log(event count)",
)
fig.update_xaxes(categoryorder='category ascending')
fig.show()
df_cr = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/org_client_roam/").select("OrgID","name","date",F.col("suboptimal_event_count").alias("client_roam_event_count"),"dt")
temp_cr = df_cr.groupby("OrgID","name").agg(F.sum("client_roam_event_count").alias("client_roam_event_count")).sort(F.desc("client_roam_event_count")).limit(10)
temp_cr.show()
df_cr_top_10 = df_cr.join(temp_cr.select("OrgID"),"OrgID")
+--------------------+--------------------+-----------------------+ | OrgID| name|client_roam_event_count| +--------------------+--------------------+-----------------------+ |bbb101eb-b62d-4fb...| US - WALMART| 816422056| |56de201d-e63b-431...| AmazonOTFC-Prod| 133845317| |604411f1-4e45-4be...| US - SAM'S| 115557040| |d8cec22e-e0c2-11e...| AmazonProd| 104128909| |d9ca39f2-a2cd-40a...| US - DC| 56236782| |5a578f3c-f094-4d1...| AmazonOTN19-Prod| 36533661| |c1cac1c4-1753-4dd...|Gap North America...| 17049655| |39b04356-4cc3-41c...| Meijer| 16178104| |22f1cc2d-ea8a-47e...| Target Corporation| 12786259| |0f2adb1c-8843-448...| Whole Foods Market| 12437717| +--------------------+--------------------+-----------------------+
%%spark -o df_cr_top_10
temp_cr.show()
+--------------------+--------------------+-----------------------+ | OrgID| name|client_roam_event_count| +--------------------+--------------------+-----------------------+ |bbb101eb-b62d-4fb...| US - WALMART| 816422056| |56de201d-e63b-431...| AmazonOTFC-Prod| 133845317| |604411f1-4e45-4be...| US - SAM'S| 115557040| |d8cec22e-e0c2-11e...| AmazonProd| 104128909| |d9ca39f2-a2cd-40a...| US - DC| 56236782| |5a578f3c-f094-4d1...| AmazonOTN19-Prod| 36533661| |c1cac1c4-1753-4dd...|Gap North America...| 17049655| |39b04356-4cc3-41c...| Meijer| 16178104| |22f1cc2d-ea8a-47e...| Target Corporation| 12786259| |0f2adb1c-8843-448...| Whole Foods Market| 12437717| +--------------------+--------------------+-----------------------+
%%local
px.bar(df_cr_top_10.groupby(["name","date"]).client_roam_event_count.sum().reset_index(),x="date",y="client_roam_event_count",color="name",barmode="group",title="Org with Top 10 Client Roam Event Counts")
df_event_count = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/org_event/").join(df_org,"OrgID")
temp_sp = df_event_count.groupby("OrgID","name").agg(F.sum("suboptimal_event_count").alias("suboptimal_event_count")).sort(F.desc("suboptimal_event_count")).limit(10)
df_suboptimal_roam_count = df_event_count.join(temp_sp.select("OrgID"),"OrgID")
temp_pp = df_event_count.groupby("OrgID","name").agg(F.sum("pingpong_event_count").alias("pingpong_event_count")).sort(F.desc("pingpong_event_count")).limit(10)
df_pingpong_event_count = df_event_count.join(temp_pp.select("OrgID"),"OrgID")
%%spark -o df_suboptimal_roam_count
temp_sp.show()
+--------------------+--------------------+----------------------+ | OrgID| name|suboptimal_event_count| +--------------------+--------------------+----------------------+ |bbb101eb-b62d-4fb...| US - WALMART| 1467138| |604411f1-4e45-4be...| US - SAM'S| 335045| |0f2adb1c-8843-448...| Whole Foods Market| 134964| |c1cac1c4-1753-4dd...|Gap North America...| 87467| |d9ca39f2-a2cd-40a...| US - DC| 62965| |d8cec22e-e0c2-11e...| AmazonProd| 48580| |56de201d-e63b-431...| AmazonOTFC-Prod| 33444| |39b04356-4cc3-41c...| Meijer| 30576| |22f1cc2d-ea8a-47e...| Target Corporation| 24659| |e1bde705-bd39-408...| Michaels| 21148| +--------------------+--------------------+----------------------+
%%local
px.bar(df_suboptimal_roam_count.groupby(["name","date"]).suboptimal_event_count.sum().reset_index(),x="date",y="suboptimal_event_count",color="name",barmode="group",title="Org with Top 10 Suboptimal Roam Event Counts")
%%spark -o df_pingpong_event_count
temp_pp.show()
+--------------------+--------------------+--------------------+ | OrgID| name|pingpong_event_count| +--------------------+--------------------+--------------------+ |bbb101eb-b62d-4fb...| US - WALMART| 17309| |52ca4540-62b5-4ed...|Nambucca Valley Care| 2592| |56de201d-e63b-431...| AmazonOTFC-Prod| 2572| |d9ca39f2-a2cd-40a...| US - DC| 2553| |2eed5700-cb1e-425...| SYSTEMTECH| 2143| |604411f1-4e45-4be...| US - SAM'S| 1717| |d8cec22e-e0c2-11e...| AmazonProd| 1616| |790be815-af8f-4dc...| Dartmouth College| 1136| |3098d079-88b7-438...| GAG Group| 864| |5a578f3c-f094-4d1...| AmazonOTN19-Prod| 681| +--------------------+--------------------+--------------------+
%%local
px.bar(df_pingpong_event_count.groupby(["name","date"]).suboptimal_event_count.sum().reset_index(),x="date",y="suboptimal_event_count",color="name",barmode="group",title="Org with Top 10 PingPong Roam Event Counts")
df_sticky_client = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/sticky_client_flatten/").join(df_org,"OrgID")
df_sticky_client_count = df_sticky_client.groupby("OrgID","name","Band","date").agg(F.count("When").alias("sticky_client_event_count"))
temp_sc = df_sticky_client_count.groupby("OrgID","name").agg(F.sum("sticky_client_event_count").alias("sticky_client_event_count")).sort(F.desc("sticky_client_event_count")).limit(10)
df_sticky_client_top_10 = df_sticky_client_count.join(temp_sc.select("OrgID"),"OrgID","inner")
%%spark -o df_sticky_client_top_10
temp_sc.show()
+--------------------+--------------------+-------------------------+ | OrgID| name|sticky_client_event_count| +--------------------+--------------------+-------------------------+ |4f7ce014-2148-4d4...| CFA-Comcast| 11893535| |0992350f-e897-471...|Massachusetts Ins...| 5034042| |a15b4c50-378e-410...| Bunnings| 1160525| |758a1f7f-6d65-4d8...| REI Retail| 604490| |9de2eacf-bc8a-46c...| Dave & Buster's| 573004| |2cd327d0-d323-4be...| Dicker Data Kurnell| 550419| |20101562-d4f8-41a...| Shopper Media Group| 423377| |b2e5fc49-d460-42b...| WeWork| 361642| |4a7f417d-a4b1-445...|Wonderful Pistach...| 217309| |a5f0b940-965c-486...| RTG| 211464| +--------------------+--------------------+-------------------------+
%%local
px.bar(df_sticky_client_top_10.groupby(["name","date"]).sticky_client_event_count.sum().reset_index(),x="date",y="sticky_client_event_count",color="name",barmode="group",title="Org with Top 10 Sticky Client Event Counts")
df_AssocFail = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/client_roam_AssocFail/")
df_AssocFail.groupby("AssocFailReason","Roamtype").agg(F.sum("event_count").alias("event_count")).show()
+---------------+---------+-----------+ |AssocFailReason| Roamtype|event_count| +---------------+---------+-----------+ | 12| | 1| | 1|slow_roam| 46| | 1| | 3| +---------------+---------+-----------+
%%spark -o df_AssocFail
%%local
df_AssocFail["AssocFailReason"] = df_AssocFail["AssocFailReason"].astype(str)
px.bar(df_AssocFail.groupby(["date","AssocFailReason","Roamtype"]).event_count.sum().reset_index(),x="date",y="event_count",color = "AssocFailReason",barmode="group",title="Client Roam Association Fail",hover_data=['Roamtype'])
df_AuthFail = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/client_roam_AuthFail/")
df_AuthFail.groupby("AuthFailReason","Roamtype").agg(F.sum("event_count").alias("event_count")).sort(F.desc("event_count")).show()
+--------------+---------+-----------+ |AuthFailReason| Roamtype|event_count| +--------------+---------+-----------+ | 258| | 694134| | 258|slow_roam| 413726| | 0|fast_roam| 319088| | 23| | 297383| | 23|slow_roam| 66129| | 16| | 52893| | 15|slow_roam| 35127| | 15| | 30741| | 16|fast_roam| 17658| | 23|fast_roam| 16753| | 0| | 9699| | 16|slow_roam| 6039| | 17|slow_roam| 4337| | 258|fast_roam| 3883| | 14| | 2375| | 2| | 2018| | 512| | 1670| | 17| | 1307| | 2|slow_roam| 1255| | 14|slow_roam| 1031| +--------------+---------+-----------+ only showing top 20 rows
df_AuthFail.groupby("OrgID").agg(F.sum("event_count")).join
%%spark -o df_AuthFail
%%local
df_AuthFail["AuthFailReason"] = df_AuthFail["AuthFailReason"].astype(str)
px.bar(df_AuthFail.groupby(["date","AuthFailReason","Roamtype"]).event_count.sum().reset_index(),x="date",y="event_count",color = "AuthFailReason",barmode="group",title="Client Roam Auth Fail",hover_data=['Roamtype'])
df_FastRoamFail = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/client_roam_FastRoamFail/")
df_FastRoamFail.groupby("FastRoamFailCause","Roamtype").agg(F.sum("event_count").alias("event_count")).sort(F.desc("event_count")).show()
+-----------------+---------+-----------+ |FastRoamFailCause| Roamtype|event_count| +-----------------+---------+-----------+ | 79|fast_roam| 286429| | 53|fast_roam| 44825| | 53|slow_roam| 511| | 79|slow_roam| 258| | 1|fast_roam| 18| | 1|slow_roam| 1| +-----------------+---------+-----------+
%%spark -o df_FastRoamFail
%%local
df_FastRoamFail["FastRoamFailCause"] = df_FastRoamFail["FastRoamFailCause"].astype(str)
px.bar(df_FastRoamFail.groupby(["date","FastRoamFailCause","Roamtype"]).event_count.sum().reset_index(),x="date",y="event_count",color = "FastRoamFailCause",barmode="group",title="Client Roam Fast Roam Fail Cause",hover_data=['Roamtype'])
%%spark -o temp
%%local
px.bar(temp,x="date",y="event_count",color="Band",barmode="group",title = "Event Count of FastRoamFail.")
df_cr = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/client_roam_latency/").filter(F.col("Roamtype").isin("slow_roam","fast_roam"))
df_cr.show(5)
+---------+----+--------+------+----------+-----------------+-----------+------------------+--------------+--------------+------------------+---------------+---------------+ | Roamtype|Band|PrevBand|SameAP| date|count_Distinct_WC|event_count|median_RoamLatency|RoamLatency_75|RoamLatency_95| mean_RoamLatency|max_RoamLatency|min_RoamLatency| +---------+----+--------+------+----------+-----------------+-----------+------------------+--------------+--------------+------------------+---------------+---------------+ |slow_roam| 2.4| 5| true|2021-02-20| 5354| 9316| 0.028| 0.067| 1.029| 6004.659035030513| 1.9154175698E7| 0.0| |slow_roam| 2.4| 2.4| true|2021-02-16| 11| 11| 0.018| 0.653| 16.8095| 3.184272727272727| 30.49| 0.007| |slow_roam| 5| | false|2021-02-19| 7097| 57045| 0.211| 0.238| 0.266|26.559768536172975| 713304.825| 0.0| |slow_roam| 5| 2.4| false|2021-02-09| 350800| 1133499| 0.033| 0.063| 1.3|1044.7368138185554| 1.8280655343E7| 0.0| |fast_roam| 2.4| 5| false|2021-02-09| 15504| 55472| 0.015| 0.023| 0.079|246.78067160334464| 1154051.854| 0.005| +---------+----+--------+------+----------+-----------------+-----------+------------------+--------------+--------------+------------------+---------------+---------------+ only showing top 5 rows
%%spark -o df_cr
%%local
df_cr["Band_change"] = df_cr[["Band","PrevBand"]].apply(lambda x: "{} to {}".format(x[1],x[0]),axis=1)
#df_cr.head()
px.bar(df_cr[df_cr["SameAP"]==True].groupby(["date","Band_change","Roamtype"]).event_count.sum().reset_index(),x="date", y="event_count", color="Band_change", barmode="group",title="Client Roam to Same AP",hover_data=['Roamtype', 'event_count'])
# roam to same AP
df_croam.filter((F.col("FromAP")==F.col("ToAP")) & (F.col("Band")==F.col("PrevBand"))).show(5)
+---------+---------------+--------+--------+--------------+--------+----+------------+------------+-----------------+-----------------+--------------------+------------------+--------+---------+-----------+----------+-------------------+--------------------+-----+-----------+-----------------+-------+-----------------+--------------------+--------------------+ |AssocFail|AssocFailReason|AuthAlgo|AuthFail|AuthFailReason|AuthType|Band|Capabilities|FastRoamFail|FastRoamFailCause| FromAP| OrgID|PotentialCandidate|PrevBand| Roamtype| SSID|SinceAssoc| SinceAuth| SiteID|State| TimeDelta| ToAP|Version| WC| When| WlanID| +---------+---------------+--------+--------+--------------+--------+----+------------+------------+-----------------+-----------------+--------------------+------------------+--------+---------+-----------+----------+-------------------+--------------------+-----+-----------+-----------------+-------+-----------------+--------------------+--------------------+ | false| 0| 0| false| 0| open| 5| 0| false| 0|d4-20-b0-c0-ad-1b|b4f71ea9-1c74-4b6...| false| 5|slow_roam|DPS_Visitor| 0|9.223372036854776E9|8d365b0a-4333-449...| true| 1081451591|d4-20-b0-c0-ad-1b| 1|8a-cf-5e-0d-91-f5|2021-02-09T17:43:...|82e9e433-ccf1-442...| | false| 0| 2| false| 0| psk11r| 5| 128| false| 0|5c-5b-35-50-20-bc|5cc036bb-1f32-4f3...| false| 5|fast_roam| TVG| null| 0.007939465|d6ece5ff-f8cc-437...| true|32502450647|5c-5b-35-50-20-bc| 1|22-3d-fc-da-81-6a|2021-02-09T17:43:...|0b772fa9-84ec-438...| | false| 0| 0| false| 0| open| 5| 0| false| 0|d4-20-b0-c0-ad-1b|b4f71ea9-1c74-4b6...| false| 5|slow_roam|DPS_Visitor| 0|9.223372036854776E9|8d365b0a-4333-449...| true|11680282160|d4-20-b0-c0-ad-1b| 1|8a-cf-5e-0d-91-f5|2021-02-09T17:43:...|82e9e433-ccf1-442...| | false| 0| 0| false| 0| open| 5| 0| false| 0|d4-20-b0-c0-ad-75|b4f71ea9-1c74-4b6...| false| 5|slow_roam|DPS_Visitor| 0|9.223372036854776E9|8d365b0a-4333-449...| true| 4525518938|d4-20-b0-c0-ad-75| 1|8a-cf-5e-0d-91-f5|2021-02-09T17:44:...|82e9e433-ccf1-442...| | false| 0| 0| false| 0| psk| 5| 0| false| 0|5c-5b-35-d0-d4-bc|d6a1ec7f-a046-4d6...| false| 5|slow_roam| Walnut| null| 1.020448954|a40b5a6d-8d11-460...| true| 407402845|5c-5b-35-d0-d4-bc| 1|5e-a9-96-d1-63-66|2021-02-09T17:58:...|bb84c133-0b61-451...| +---------+---------------+--------+--------+--------------+--------+----+------------+------------+-----------------+-----------------+--------------------+------------------+--------+---------+-----------+----------+-------------------+--------------------+-----+-----------+-----------------+-------+-----------------+--------------------+--------------------+ only showing top 5 rows
%%local
df_cr["Band_change"] = df_cr[["Band","PrevBand"]].apply(lambda x: "{} to {}".format(x[1],x[0]),axis=1)
#df_cr.head()
px.bar(df_cr[df_cr["SameAP"]==False].groupby(["date","Band_change","Roamtype"]).event_count.sum().reset_index(),x="date", y="event_count", color="Band_change", barmode="group",title="Client Roam to Different AP",hover_data=['Roamtype', 'event_count'])
%%local
px.bar(df_cr.groupby(["Band_change","Roamtype","SameAP"]).median_RoamLatency.mean().reset_index(),x="Band_change", y="median_RoamLatency", color="SameAP", barmode="group",title="Median Roaming Latency",hover_data=['Roamtype', 'SameAP'])
df_cr.filter(F.col("SameAP")==True).sort(F.desc("RoamLatency_95")).show()
+---------+----+--------+------+----------+-----------------+-----------+--------------------+-------------------+------------------+------------------+----------------+---------------+ | Roamtype|Band|PrevBand|SameAP| date|count_Distinct_WC|event_count| median_RoamLatency| RoamLatency_75| RoamLatency_95| mean_RoamLatency| max_RoamLatency|min_RoamLatency| +---------+----+--------+------+----------+-----------------+-----------+--------------------+-------------------+------------------+------------------+----------------+---------------+ |slow_roam| 2.4| 2.4| true|2021-02-10| 9| 10| 0.0155| 0.03775| 679.4622999999988|124.09969999999998| 1210.78| 0.008| |slow_roam| 5| 2.4| true|2021-02-14| 121114| 593416| 0.029| 0.447| 69.28895| 16581.88331065381|1.613340787062E9| 0.0| |slow_roam| 5| 2.4| true|2021-02-13| 139781| 691900| 0.028| 0.418| 65.40479999999982|16176.425693717007|1.613218396687E9| 0.0| |slow_roam| 5| 2.4| true|2021-02-19| 168808| 883373| 0.027| 0.302| 59.54759999999986|19216.169862918014|1.613771061446E9| 0.0| |slow_roam| 5| 2.4| true|2021-02-20| 6709| 11091| 0.028|0.40874999999999995| 56.41619999999999|10739.144987986632| 2.0163135987E7| 0.0| |slow_roam| 5| 2.4| true|2021-02-12| 171651| 894795| 0.027| 0.285|55.581099999999864| 18156.11396634966| 1.6131635232E9| 0.0| |slow_roam| 5| 2.4| true|2021-02-09| 84424| 302173| 0.027| 0.309| 49.70619999999999|3204.2414355861215| 2.1013274205E7| 0.0| |slow_roam| 5| 2.4| true|2021-02-15| 132240| 676711| 0.028| 0.338| 48.94514999999996| 10958.17332142937|1.613403951281E9| 0.0| |slow_roam| 5| 2.4| true|2021-02-16| 153133| 831324| 0.027| 0.236| 44.955| 6486.135178903683| 3.171263531E7| 0.0| |slow_roam| 5| 2.4| true|2021-02-17| 166072| 887891| 0.027| 0.227| 42.31314999999997| 8771.003002081856|1.613604858282E9| 0.0| |slow_roam| 5| 2.4| true|2021-02-11| 175063| 929922| 0.027| 0.206| 42.15284999999986|10677.346491596187|1.613080711166E9| 0.0| |slow_roam| 5| 2.4| true|2021-02-18| 164975| 880116| 0.027| 0.213| 42.09279999999999|25150.693631628008|1.613661160868E9| 0.0| |slow_roam| 5| 2.4| true|2021-02-10| 178496| 942220| 0.027| 0.21| 41.312| 4151.777881738804| 2.9801002192E7| 0.0| |slow_roam| 2.4| 2.4| true|2021-02-19| 12| 13| 0.029| 0.103|31.161599999999993| 4.875| 32.808| 0.01| |fast_roam| 2.4| 2.4| true|2021-02-09| 10| 10|0.013000000000000001| 22.52375| 30.03825| 9.0187| 30.045| 0.008| |slow_roam| 2.4| 2.4| true|2021-02-17| 14| 14| 0.062| 0.27475| 30.03725| 6.487142857142858| 30.073| 0.008| |slow_roam| 2.4| 2.4| true|2021-02-15| 9| 9| 0.017| 30.01| 30.0368|10.245444444444445| 30.046| 0.006| |slow_roam| 2.4| 2.4| true|2021-02-18| 13| 18| 0.024| 2.4044999999999996|30.025000000000002|4.0357777777777795| 30.11| 0.008| |slow_roam| 5| 5| true|2021-02-12| 465| 821| 0.127| 1.094|29.869799999999994|2055.5483465211464| 94989.49| 0.0| |slow_roam| 2.4| 2.4| true|2021-02-09| 3| 3| 8.78| 19.4115|27.916699999999995|12.946333333333333| 30.043| 0.016| +---------+----+--------+------+----------+-----------------+-----------+--------------------+-------------------+------------------+------------------+----------------+---------------+ only showing top 20 rows
df_sr= spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/total_suboptimal_roam_rssi_diff/")
df_sr.show()
+------+-----------------+-----------+----------------+------------+------------+-------------------+-------------+-------------+ |SameAP|count_distinct_wc|event_count|median_RSSI_diff|RSSI_diff_75|RSSI_diff_95| mean_RSSI_diff|max_RSSI_diff|min_RSSI_diff| +------+-----------------+-----------+----------------+------------+------------+-------------------+-------------+-------------+ | true| 64646| 282780| -12.0| -9.0| -7.0|-13.157627837895184| -7| -86| | false| 457452| 945583| -12.0| -9.0| -7.0| -13.82344648751088| -7| -91| +------+-----------------+-----------+----------------+------------+------------+-------------------+-------------+-------------+
df_sr = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/suboptimal_roam_rssi_diff/")
df_sr.show(10)
+----------+------+-----------------+-----------+----------------+------------+------------+-------------------+-------------+-------------+ | date|SameAP|count_distinct_wc|event_count|median_RSSI_diff|RSSI_diff_75|RSSI_diff_95| mean_RSSI_diff|max_RSSI_diff|min_RSSI_diff| +----------+------+-----------------+-----------+----------------+------------+------------+-------------------+-------------+-------------+ |2021-02-17| false| 24918| 31700| -12.0| -9.0| -7.0| -14.01435331230284| -7| -76| |2021-02-07| true| 4124| 11572| -12.0| -9.0| -7.0|-13.162115451088836| -7| -76| |2021-02-20| false| 234| 238| -12.0| -9.0| -7.0|-13.617647058823529| -7| -32| |2021-02-18| true| 4760| 12610| -12.0| -9.0| -7.0|-13.141950832672482| -7| -75| |2021-02-13| true| 4885| 12943| -12.0| -9.0| -7.0|-13.181179015684153| -7| -82| |2021-01-31| false| 160| 216| -14.0| -9.0| -7.0| -15.12962962962963| -7| -43| |2021-02-09| false| 37810| 50148| -12.0| -9.0| -7.0|-13.690954773869347| -7| -75| |2021-02-14| false| 28087| 38685| -12.0| -9.0| -7.0|-13.787953987333593| -7| -71| |2021-02-12| false| 45990| 61000| -12.0| -9.0| -7.0|-13.804016393442623| -7| -89| |2021-02-03| false| 35844| 52047| -12.0| -9.0| -7.0|-14.037639057006167| -7| -72| +----------+------+-----------------+-----------+----------------+------------+------------+-------------------+-------------+-------------+ only showing top 10 rows
%%spark -o df_sr
%%local
# suboptimal event comp betwee same and different AP.
px.bar(df_sr.groupby(["date","SameAP"]).event_count.sum().reset_index(),x="date",y="event_count",color="SameAP",barmode="group",title="Suboptimal Event Count to Same AP vs Different AP")
#ping Pong roam raw data
input_files = "s3://mist-secorapp-production/client-pingpong-roaming/client-pingpong-roaming-production/dt=2021-02*/*/*.seq"
data_rdd = spark.sparkContext.sequenceFile(input_files).map(lambda r: json.loads(r[1]))
df_pp= spark.createDataFrame(data_rdd)
df_pp.printSchema()
df_pp.show(5)
root |-- FromAP: string (nullable = true) |-- NumRoams: long (nullable = true) |-- OrgID: string (nullable = true) |-- SiteID: string (nullable = true) |-- ToAP: string (nullable = true) |-- Version: long (nullable = true) |-- WC: string (nullable = true) |-- When: string (nullable = true) |-- Window: long (nullable = true) |-- WlanID: string (nullable = true) +-----------------+--------+--------------------+--------------------+-----------------+-------+-----------------+--------------------+------------+--------------------+ | FromAP|NumRoams| OrgID| SiteID| ToAP|Version| WC| When| Window| WlanID| +-----------------+--------+--------------------+--------------------+-----------------+-------+-----------------+--------------------+------------+--------------------+ |5c-5b-35-d2-4b-5d| 3|bbb101eb-b62d-4fb...|94e4e435-136b-49c...|d4-20-b0-c2-1b-06| 1|c6-4b-52-48-97-9c|2021-02-01T00:00:...|-10034829438|28cac5b0-43ca-4ae...| |5c-5b-35-d1-01-ad| 3|bbb101eb-b62d-4fb...|9b1b9cfc-a561-43d...|5c-5b-35-d1-01-4e| 1|3a-f6-5a-87-4f-b5|2021-02-01T00:00:...|-19738379302|5f318cfe-b216-4b8...| |5c-5b-35-af-1f-e9| 3|bbb101eb-b62d-4fb...|08216ecc-4dfa-494...|5c-5b-35-af-1e-22| 1|0c-2f-b0-c6-44-00|2021-02-01T00:08:...| -2592395537|5f318cfe-b216-4b8...| |d4-20-b0-81-47-e4| 3|d8cec22e-e0c2-11e...|d6d033b3-1274-48c...|d4-20-b0-81-46-b3| 1|8c-c8-4b-b1-13-9b|2021-02-01T00:11:...| -649432157|3d393407-d3db-42e...| |5c-5b-35-ae-b9-46| 3|604411f1-4e45-4be...|6c7657a9-8dc5-412...|5c-5b-35-ae-c2-4c| 1|70-ce-8c-a2-ce-c2|2021-02-01T00:21:...| -701470121|fa1b1bd8-b619-422...| +-----------------+--------+--------------------+--------------------+-----------------+-------+-----------------+--------------------+------------+--------------------+ only showing top 5 rows
df_pp.groupby("NumRoams").agg(F.count("WC")).show(10)
+--------+---------+ |NumRoams|count(WC)| +--------+---------+ | 3| 68387| +--------+---------+
df_pp = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/pingpong_roam_agg/").filter(F.col("IsPingPong")==True).withColumn("day",F.date_format("date", 'E'))
df_pp.show(5)
+----------+------+----------+-----------+------------------+--------------------------+----------------------+----------------------+------------------------+-----------------------+-----------------------+---+ | date|SameAP|IsPingPong|event_count|count_distinct_mac|median_pingpong_duration_s|pingpong_duration_s_75|pingpong_duration_s_95|mean_pingpong_duration_s|max_pingpong_duration_s|min_pingpong_duration_s|day| +----------+------+----------+-----------+------------------+--------------------------+----------------------+----------------------+------------------------+-----------------------+-----------------------+---+ |2021-02-12| true| true| 251| 214| 8.796026571| 24.950885563| 75.33606464600001| 19.346143137553785| 114.748611639| 0.006200507|Fri| |2021-02-16| true| true| 67| 62| 8.749670674| 25.993954756| 65.59251198349997| 19.050224249925375| 94.77926207| 0.028822597|Tue| |2021-02-08| true| true| 251| 216| 10.117249503| 20.318475960500002| 70.05992217400001| 17.57307894453386| 107.818753423| 0.248847088|Mon| |2021-02-11| false| true| 3384| 2833| 6.26386957| 17.5828943265| 61.77535126550001| 14.242095053413708| 119.219198043| 0.0|Thu| |2021-02-17| true| true| 100| 74| 12.2038319165| 39.97108211525| 90.68115202579999| 24.836056458410003| 102.744723159| 0.063464121|Wed| +----------+------+----------+-----------+------------------+--------------------------+----------------------+----------------------+------------------------+-----------------------+-----------------------+---+ only showing top 5 rows
%%spark -o df_pp
%%local
px.bar(df_pp.groupby(["date","SameAP"]).event_count.sum().reset_index(),x="date", y="event_count", color="SameAP", barmode="group")
%%local
# Pingpong duration
px.bar(df_pp.groupby(["date","SameAP"]).median_pingpong_duration_s.sum().reset_index(),x="date", y="median_pingpong_duration_s", color="SameAP", barmode="group")
# Sticky Client Raw Data
input_files = "s3://mist-secorapp-production/sticky-client/sticky-client-production/dt=2021-02-10/*/*.seq"
data_rdd = spark.sparkContext.sequenceFile(input_files).map(lambda r: json.loads(r[1])).filter(lambda r: r["Sticky"]==True)
df_sc= spark.createDataFrame(data_rdd)
df_sc.printSchema()
df_sc.show()
print(df_sc.limit(1).collect())
root
|-- Assoc: map (nullable = true)
| |-- key: string
| |-- value: string (valueContainsNull = true)
|-- RoamCandidates: array (nullable = true)
| |-- element: map (containsNull = true)
| | |-- key: string
| | |-- value: string (valueContainsNull = true)
|-- Sticky: boolean (nullable = true)
|-- Version: long (nullable = true)
|-- WC: string (nullable = true)
|-- When: string (nullable = true)
+--------------------+--------------------+------+-------+-----------------+--------------------+
| Assoc| RoamCandidates|Sticky|Version| WC| When|
+--------------------+--------------------+------+-------+-----------------+--------------------+
|[OrgID -> 4f7ce01...|[[RSSI -> -72, Ba...| true| 3|e2-9e-cb-1b-1a-a8|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -67, Ba...| true| 3|ca-47-45-d4-5c-a7|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -58, Ba...| true| 3|c0-84-7d-20-d6-ac|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -53, Ba...| true| 3|ee-56-ae-ba-96-eb|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -64, Ba...| true| 3|52-99-69-45-97-78|2021-02-10T00:45:...|
|[OrgID -> 0992350...|[[RSSI -> -62, Ba...| true| 3|f8-ff-c2-4e-24-d0|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -64, Ba...| true| 3|c0-84-7d-c9-45-2e|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -44, Ba...| true| 3|8c-c6-81-a1-19-d1|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -61, Ba...| true| 3|da-33-cb-c3-96-12|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -67, Ba...| true| 3|56-51-63-1f-f9-15|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -70, Ba...| true| 3|ca-c5-e0-62-9c-56|2021-02-10T00:45:...|
|[OrgID -> a7cc116...|[[RSSI -> -60, Ba...| true| 3|2a-07-4a-f3-29-8e|2021-02-10T00:45:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -64, Ba...| true| 3|b2-a9-4a-7a-16-a5|2021-02-10T00:46:...|
|[OrgID -> 2cd327d...|[[RSSI -> -71, Ba...| true| 3|98-6c-f5-56-af-ba|2021-02-10T00:46:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -65, Ba...| true| 3|9e-d4-74-46-eb-7e|2021-02-10T00:46:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -70, Ba...| true| 3|ca-c5-e0-62-9c-56|2021-02-10T00:46:...|
|[OrgID -> 4f7ce01...|[[RSSI -> -64, Ba...| true| 3|26-c2-01-41-d6-65|2021-02-10T00:46:...|
|[OrgID -> 313cd17...|[[RSSI -> -63, Ba...| true| 3|00-14-a5-ca-19-a8|2021-02-10T00:46:...|
|[OrgID -> a15b4c5...|[[RSSI -> -64, Ba...| true| 3|5c-1d-d9-0d-a9-cc|2021-02-10T00:46:...|
|[OrgID -> 0992350...|[[RSSI -> -67, Ba...| true| 3|00-f6-20-57-c6-cf|2021-02-10T00:46:...|
+--------------------+--------------------+------+-------+-----------------+--------------------+
only showing top 20 rows
[Row(Assoc={'OrgID': '4f7ce014-2148-4d41-88f0-07c9c2e6f16c', 'SiteID': 'c8b285e1-bb09-4fa1-8e71-2c2e865cee2b', 'BSSID': '5c-5b-35-e1-41-c3', 'Band': '5', 'WLAN': '1fc8eb8a-56b0-4b9e-b2ae-2391d2be4eb8', 'AvgRSSI': '-77', 'SSID': 'iSPOTA1', 'AP': '5c-5b-35-d0-46-b9'}, RoamCandidates=[{'RSSI': '-72', 'Band': '5', 'BSSID': '5c-5b-35-e1-09-23', 'AP': '5c-5b-35-d0-40-d3'}], Sticky=True, Version=3, WC='e2-9e-cb-1b-1a-a8', When='2021-02-10T00:45:48.484555937Z')]
df_sc = spark.read.parquet("s3://mist-data-science-dev/may/roaming_exploration/sticky_client_flatten/").withColumn("RSSI_diff",F.col("CurrentRSSI")-F.col("Candidate_RSSI"))
df_sc.show()
+-----------------+--------------------+--------------------+----+-----------+-----------------+--------------+--------------+-----------------+--------------------+----------+---------+ | WC| OrgID| SiteID|Band|CurrentRSSI| currentAP|Candidate_Band|Candidate_RSSI| Candidate_AP| When| date|RSSI_diff| +-----------------+--------------------+--------------------+----+-----------+-----------------+--------------+--------------+-----------------+--------------------+----------+---------+ |c0-84-7d-c9-58-40|4f7ce014-2148-4d4...|2b691141-0344-47f...| 5| -83|5c-5b-35-53-a0-c0| 5| -70|d4-20-b0-41-5c-f7|2021-02-10T18:00:...|2021-02-10| -13.0| |c0-84-7d-c9-58-40|4f7ce014-2148-4d4...|2b691141-0344-47f...| 5| -83|5c-5b-35-53-a0-c0| 2.4| -70|d4-20-b0-41-5c-f7|2021-02-10T18:00:...|2021-02-10| -13.0| |42-d6-33-3b-e3-46|4f7ce014-2148-4d4...|19216f53-1b26-4c6...| 5| -74|5c-5b-35-d0-5f-4b| 5| -65|5c-5b-35-d0-5f-5f|2021-02-10T18:00:...|2021-02-10| -9.0| |42-d6-33-3b-e3-46|4f7ce014-2148-4d4...|19216f53-1b26-4c6...| 5| -74|5c-5b-35-d0-5f-4b| 5| -69|5c-5b-35-d0-5f-41|2021-02-10T18:00:...|2021-02-10| -5.0| |f4-0f-24-2c-04-75|112db018-05a4-465...|def32bb1-a704-40b...| 5| -75|d4-20-b0-82-8e-c4| 2.4| -72|d4-20-b0-82-8c-ad|2021-02-10T18:00:...|2021-02-10| -3.0| |f4-0f-24-2c-04-75|112db018-05a4-465...|def32bb1-a704-40b...| 5| -75|d4-20-b0-82-8e-c4| 5| -72|d4-20-b0-82-8c-ad|2021-02-10T18:00:...|2021-02-10| -3.0| |10-2c-6b-0b-ef-9c|4f7ce014-2148-4d4...|fe7c7491-dd58-41f...| 5| -87|5c-5b-35-d0-3e-c6| 5| -61|5c-5b-35-d0-3e-c6|2021-02-10T18:00:...|2021-02-10| -26.0| |10-2c-6b-0b-ef-9c|4f7ce014-2148-4d4...|fe7c7491-dd58-41f...| 5| -87|5c-5b-35-d0-3e-c6| 2.4| -61|5c-5b-35-d0-3e-c6|2021-02-10T18:00:...|2021-02-10| -26.0| |42-e2-8c-c1-83-8d|4f7ce014-2148-4d4...|c8b285e1-bb09-4fa...| 5| -77|5c-5b-35-d0-46-b9| 5| -54|5c-5b-35-d0-40-d3|2021-02-10T18:00:...|2021-02-10| -23.0| |42-e2-8c-c1-83-8d|4f7ce014-2148-4d4...|c8b285e1-bb09-4fa...| 5| -77|5c-5b-35-d0-46-b9| 2.4| -54|5c-5b-35-d0-40-d3|2021-02-10T18:00:...|2021-02-10| -23.0| |10-2c-6b-1a-b6-54|4f7ce014-2148-4d4...|210ec126-ccaf-466...| 2.4| -77|d4-dc-09-00-4b-41| 5| -66|5c-5b-35-53-9f-b2|2021-02-10T18:00:...|2021-02-10| -11.0| |10-2c-6b-1a-b6-54|4f7ce014-2148-4d4...|210ec126-ccaf-466...| 2.4| -77|d4-dc-09-00-4b-41| 2.4| -66|5c-5b-35-53-9f-b2|2021-02-10T18:00:...|2021-02-10| -11.0| |10-2c-6b-3d-76-38|4f7ce014-2148-4d4...|c2dfe551-c57d-4f5...| 5| -81|5c-5b-35-53-a3-b3| 5| -72|5c-5b-35-d0-51-68|2021-02-10T18:00:...|2021-02-10| -9.0| |10-2c-6b-3d-76-38|4f7ce014-2148-4d4...|c2dfe551-c57d-4f5...| 5| -81|5c-5b-35-53-a3-b3| 2.4| -72|5c-5b-35-d0-51-68|2021-02-10T18:00:...|2021-02-10| -9.0| |10-2c-6b-0c-06-f0|4f7ce014-2148-4d4...|8d437e36-6850-4de...| 2.4| -78|5c-5b-35-d0-3a-ca| 5| -62|5c-5b-35-d0-3e-2b|2021-02-10T18:00:...|2021-02-10| -16.0| |10-2c-6b-0c-06-f0|4f7ce014-2148-4d4...|8d437e36-6850-4de...| 2.4| -78|5c-5b-35-d0-3a-ca| 2.4| -62|5c-5b-35-d0-3e-2b|2021-02-10T18:00:...|2021-02-10| -16.0| |8e-3b-85-e5-55-60|b2e5fc49-d460-42b...|33c09846-1937-4ff...| 5| -75|5c-5b-35-ce-a9-6f| 2.4| -65|5c-5b-35-ce-a8-ac|2021-02-10T18:00:...|2021-02-10| -10.0| |8e-3b-85-e5-55-60|b2e5fc49-d460-42b...|33c09846-1937-4ff...| 5| -75|5c-5b-35-ce-a9-6f| 5| -65|5c-5b-35-ce-a8-ac|2021-02-10T18:00:...|2021-02-10| -10.0| |f2-af-57-9c-80-80|4f7ce014-2148-4d4...|dcf07e2c-1613-429...| 5| -79|d4-20-b0-41-64-31| 5| -58|d4-20-b0-41-64-31|2021-02-10T18:00:...|2021-02-10| -21.0| |f2-af-57-9c-80-80|4f7ce014-2148-4d4...|dcf07e2c-1613-429...| 5| -79|d4-20-b0-41-64-31| 2.4| -58|d4-20-b0-41-64-31|2021-02-10T18:00:...|2021-02-10| -21.0| +-----------------+--------------------+--------------------+----+-----------+-----------------+--------------+--------------+-----------------+--------------------+----------+---------+ only showing top 20 rows
# top 10 orgID for sticky clients
df_sc.groupby("OrgID").agg(F.count("WC").alias("event_count"),F.countDistinct("WC").alias("count_distinct_WC")).withColumn("avg_event_per_WC", F.col("event_count")/F.col("count_distinct_WC")).sort(F.desc("event_count")).show(20,truncate=False)
+------------------------------------+-----------+-----------------+------------------+ |OrgID |event_count|count_distinct_WC|avg_event_per_WC | +------------------------------------+-----------+-----------------+------------------+ |4f7ce014-2148-4d41-88f0-07c9c2e6f16c|11166654 |29319 |380.86749206998877| |0992350f-e897-4719-8671-010a7e4ebf9c|4709050 |29471 |159.7858912150928 | |a15b4c50-378e-4103-974e-ea9014cd281b|1082509 |4640 |233.29935344827587| |758a1f7f-6d65-4d85-90b2-0775b8b476bc|579366 |5169 |112.08473592571097| |9de2eacf-bc8a-46cc-ad0e-e81780f1095b|552072 |13666 |40.39748280403922 | |2cd327d0-d323-4bef-b455-9dca175026b2|501272 |554 |904.8231046931407 | |20101562-d4f8-41aa-aaa9-98349d78dfa0|397515 |15076 |26.367405147253912| |b2e5fc49-d460-42b9-893c-27dbaff943aa|328867 |5677 |57.92971639950678 | |4a7f417d-a4b1-4452-804e-589a29dff85d|201088 |1965 |102.33486005089058| |a5f0b940-965c-4867-ab7c-ba93609872ab|199943 |1367 |146.2640819312363 | |5f71b73b-532f-4c07-b6c1-22cff8942b35|185413 |187 |991.5133689839572 | |7e37ab7a-f873-4247-aa57-d0cc22ad95ef|175999 |6902 |25.499710228919152| |fc35aa7d-7e64-4083-8aed-45730fdd7b43|125508 |430 |291.8790697674419 | |eff7b071-6671-4eb8-94b7-2a20303cf1b4|124537 |2110 |59.02227488151659 | |be47404e-6e38-4546-b93c-00c083622e75|105594 |1406 |75.10241820768137 | |313cd174-b2e1-40cf-8908-b4ba11a7c85f|68939 |891 |77.37261503928171 | |124630b3-a36e-423d-b7eb-e1af323eb906|61618 |632 |97.49683544303798 | |d81f3b9a-c090-499a-9fda-74c13d892a85|48294 |1755 |27.51794871794872 | |790be815-af8f-4dc5-b8c2-e01b8a107db4|39941 |838 |47.66229116945107 | |cd38e94a-9b9c-11e6-9e83-02e208b2d34f|34474 |3206 |10.75296319401123 | +------------------------------------+-----------+-----------------+------------------+ only showing top 20 rows
# count of sticky client per band
sticky_client_count = df_sc.groupby("When","WC","date","Band").agg(F.count("Candidate_AP").alias("RoamCandidate_count"))
sticky_client_count.groupby("RoamCandidate_count").agg(F.count("WC").alias("event_count")).sort(F.desc("event_count")).show(500)
# histogram of roamcandidate_count
+-------------------+-----------+ |RoamCandidate_count|event_count| +-------------------+-----------+ | 2| 5390022| | 1| 1330850| | 4| 885690| | 6| 319467| | 8| 152785| | 3| 125902| | 10| 48368| | 5| 43468| | 16| 40714| | 12| 28312| | 7| 23468| | 14| 17687| | 9| 1864| | 11| 1392| | 13| 1083| | 15| 261| +-------------------+-----------+
df_sc.groupby("Band").agg(F.count("WC").alias("event_count")).show()
temp = df_sc.groupby("date","Band").agg(F.count("WC").alias("event_count"))
+----+-----------+ |Band|event_count| +----+-----------+ | 2.4| 3387450| | | 74| | 5| 17937082| +----+-----------+
%%spark -o temp
%%local
## sticky client event count per band
temp["Band"] = temp["Band"].astype(str)
px.bar(temp,x="date",y="event_count",color="Band",barmode="group")
%%spark -o df_sc
%%local
#Sticky Client Current RSSI Histogram
px.histogram(df_sc,x="CurrentRSSI",title="Sticky Client Current RSSI",color = "Band")
%%local
#Sticky Client Current RSSI Histogram
px.histogram(df_sc,x="RSSI_diff",title="Sticky Client RSSI Diff between Current and Candidate APs",color = "Candidate_Band")
%%local
#Sticky Client Current RSSI Histogram
df_sc["Band_change"] = df_sc[["Band","Candidate_Band"]].apply(lambda x: "{} to {}".format(x[1],x[0]),axis=1)
df_sc["SameAP"] = df["CurrentAP"]
px.histogram(df_sc,x="RSSI_diff",title="Sticky Client RSSI Diff between Current and Candidate APs",color = "Band_change")
df_sc.withColumn("SameAP",F.col("CurrentAP")==F.col("Candidate_AP")).groupby("SameAP").agg(F.count("WC").alias("event_count")).show()
+------+-----------+ |SameAP|event_count| +------+-----------+ | true| 184945| | false| 4169606| +------+-----------+